Crypto currencies are quite trendy those days. Definition. You can buy some in every places. But is the price the same everywhere?

library(tidyverse)
library(rmarkdown)    # You need this library to run this template.
library(epuRate)      # Install with devtools: install_github("holtzy/epuRate", force=TRUE)
library(DT)

1 - Call a platform API


Let’s say you want to harvest the Bitcoin in the Kraken plateform. You have to call it’s public API. You can do that in your browser, typing this URL:
https://api.kraken.com/0/public/Ticker?pair=BTCEUR

Description of the result.

Now let’s do the same using the R programming language:

# Recover the information
library(RCurl)
adress <- "https://api.kraken.com/0/public/Ticker?pair=BTCEUR"
ticker <- getURLContent(adress)

# Make the format more readable
require(jsonlite)
tmp <- fromJSON(ticker)$result[[1]]
result = data.frame(ask=tmp$a[1], bid=tmp$b[1], last=tmp$c[1], open=tmp$o, low=tmp$l[1], high=tmp$h[1], volume=tmp$v[1], volumeQuote=NA, timestamp=NA)
#datatable(result, rownames = FALSE, options = list(dom=TRUE) )
result

We can transform this piece of code in a function.

get_kraken=function(time, currency){
  adress=paste("https://api.kraken.com/0/public/Ticker?pair=", currency, sep="" )
  tmp=getURLContent(adress) %>% fromJSON() 
  tmp=tmp$result[[1]]
  result = data.frame(time=time, platform="Kraken", ask=tmp$a[1], bid=tmp$b[1], last=tmp$c[1], open=tmp$o, low=tmp$l[1], high=tmp$h[1], volume=tmp$v[1], volumeQuote=NA, timestamp=NA, symbol=currency)
  return(result)
}

2 - Harvest 5 plateforms for 5 currencies


I’ve created a text file with one function like the one above per plateform. The file is located in my github. We can source it doing:

# POTENTIAL PROBLEM WITH THE LOADING OF LIBRARIES
source("https://raw.githubusercontent.com/holtzy/Cryp-To/master/FUNCTIONS/Public_Market_Functions.R")

Now we are ready for an infinite loop! I’m gonna run these functions for several platforms and several crypto every 10 secondes approx.

#Initialize an emtpy result table
Ticker=as.data.frame(matrix(NA, 0, 12))
names(Ticker) = c("time", "platform", "ask", "bid", "last", "open", "low", "high", "volume", "volumeQuote", "timestamp", "symbol")

# Run the analysis
num=0
while(TRUE){
  
  # Keep a trace of where we are in this loop
  num=num+1
  cat("Let's start the iteration number: ",num, " at ", Sys.time(), "\n" )
  
  # BITCOIN (BTC)
  time=Sys.time()
  tmp=try(get_coinbase(time, "BTCEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_kraken(time, "BTCEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_bitstamp(time, "BTCEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_bitfinex(time, "BTCEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_cex(time, "BTCEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  
  # ETHERUM (ETH)
  time=Sys.time()
  tmp=try(get_coinbase(time, "ETHEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_kraken(time, "ETHEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_bitstamp(time, "ETHEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_cex(time, "ETHEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  
  # LITECOIN (LTC)
  time=Sys.time()
  tmp=try(get_coinbase(time, "LTCEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_kraken(time, "LTCEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_bitstamp(time, "LTCEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }

  # BITCOINCASH (BCH)
  time=Sys.time()
  tmp=try(get_coinbase(time, "BCHEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_kraken(time, "BCHEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_bitstamp(time, "BCHEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_cex(time, "BCHEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  
  # RIPPLE (XRP)
  time=Sys.time()
  tmp=try(get_kraken(time, "XRPEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_bitstamp(time, "XRPEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }
  tmp=try(get_cex(time, "XRPEUR")) ; if(is.data.frame(tmp)){ Ticker=rbind(Ticker, tmp) }

  # Every ten loop I save the file
  if (num %% 1 == 0){  save(Ticker, file="../DATA/public_ticker_harvest.Rdata")  }

}

I’ve run this script during X days from the to the. This dataset is called public_ticker_harvest.Rdata that is available on github.

3 - Visualize currency values


Let’s visualize the differences!

# Load result
load("../DATA/public_ticker_harvest.Rdata")
#Ticker <- Ticker %>% sample_n(200000)

# Occurence of each
table(Ticker$platform, Ticker$symbol)
##           
##            BTCEUR ETHEUR LTCEUR BCHEUR XRPEUR
##   coinbase  42091  42092  42093  42093      0
##   Kraken    42088  42091  42091  42092  42088
##   Bitstamp  42085  42088  42085  42087  42085
##   Bitfinex  42078      0      0      0      0
##   Cex       42044  42031      0  42043  42042

A function that make a plot for a specific currency:

plot_ticker=function(currency){
  
  # I need a wide format for Dygraph
  data = Ticker %>% 
    filter(symbol==currency) %>%
    select(time, platform, last) %>%
    spread(platform, last)
  
  # Go to a xts format
  library(xts)
  don=xts( x=data[,-1], order.by=data$time)

  #time_series <- xts(tmp)
  library(dygraphs)
  dygraph(don) %>%
    dyLegend(show = "always", hideOnMouseOut = FALSE) %>%
    #dyRoller(rollPeriod = 5) %>%
    dyRangeSelector()
}

3.1 BTC

plot_ticker("BTCEUR")

3.2 ETH

plot_ticker("ETHEUR")

3.3 LTC

plot_ticker("LTCEUR")

3.4 XRP

plot_ticker("XRPEUR")

3.5 BCH

plot_ticker("BCHEUR")

4 - Difference

Can we try to quantify the difference?

diff <- Ticker %>%
  filter(platform %in% c("Bitstamp", "Kraken")) %>%
  group_by(time, symbol) %>%
  summarise(max=max( as.numeric(last) ), min=min( as.numeric(last) ) ) %>%
  mutate( diff=max-min, diff_perc=(max-min)/max*100 )

ggplot(diff, aes(x=time, y=diff_perc, group=symbol, fill=symbol)) +
    geom_area() +
    facet_wrap(~symbol, nrow=5) +
    theme(legend.position="none") +
    ylim(0,4)

# A function that calculates the difference between 2 platforms for every currency at each time stamp
find_differences <- function(plat1, plat2){
  diff <- Ticker %>% 
    filter(platform %in% c(plat1, plat2)) %>%
    select(time, platform, symbol, ask, bid) %>%
    mutate(ask=as.numeric(ask), bid=as.numeric(bid)) %>%
    gather(temp, value, -time, -platform, -symbol) %>%
    mutate(platform=gsub(plat1,"plat1", platform)) %>%
    mutate(platform=gsub(plat2,"plat2", platform)) %>%
    unite(temp1, platform, temp, sep="_") %>%
    spread( key=temp1, value=value) %>%
    mutate( 
      diff1=(plat1_bid-plat2_ask)/plat1_bid*100, 
      diff2=(plat2_bid-plat1_ask)/plat2_bid*100
    ) %>%
    rowwise() %>%
    mutate( diff_perc=max(diff1, diff2) )
  return(diff)
} 

# Find differences between kraken and bitstamp
diff <- find_differences("Bitstamp", "Kraken")

ggplot(diff, aes(x=time, y=diff_perc, group=symbol, fill=symbol)) +
    geom_area() +
    facet_wrap(~symbol, nrow=5) +
    theme(legend.position="none") +
    ylim(0,4)

5 - Significant difference

What interest us is the number of time the difference between 2 platforms reaches a threshold that allow to perform arbitrage. Let’s try to quantify how many times we reach this threshold for every currency between kraken and bitstamp.

# A function that counts the number of significant differences for several threshold
find_signif_diff <- function(diff){
  nbSignifDiff=data.frame()
  for( i in seq(0.7,3,0.1)){
    df <-  diff %>%
      group_by(symbol) %>%
      filter(diff_perc > i) %>%
      summarise( nb_over_thres = n() ) %>%
      mutate( thres = i) %>%
      arrange( nb_over_thres )
    nbSignifDiff <- rbind( nbSignifDiff, df)
  }
  return(nbSignifDiff)
}

nbSignifDiff <- find_signif_diff(diff)

ggplot(nbSignifDiff, aes(x=thres, y=nb_over_thres, group=symbol, color=symbol)) +
  geom_line() +
  ylab("Number of cases in x days") +
  xlab("Difference threshold (%)")

Now, let’s make this calculation for every pair of plateform.

# find all the pairs of platform
list_platforms <- Ticker$platform %>% unique() %>% as.character()
list_pairs <- combn(list_platforms, 2)

#
bilan=data.frame()
for(i in c(1:ncol(list_pairs))){
  diff <- find_differences(list_pairs[1,i], list_pairs[2,i])
  nbSignifDiff <- find_signif_diff(diff)
  nbSignifDiff$pair <- paste( list_pairs[1,i], list_pairs[2,i], sep="-")
  bilan <- rbind(bilan, nbSignifDiff)
}

#
ggplot(bilan, aes(x=thres, y=nb_over_thres, group=symbol, color=symbol)) +
  geom_line() +
  ylab("Number of cases in x days") +
  xlab("Difference threshold (%)") +
  facet_wrap(~pair, ncol=5)

6 - A look to the volumes

Ticker %>% 
  ggplot( aes(x=time, y=as.numeric(volume), color=platform)) +
    geom_line() +
    facet_wrap(~symbol, scale="free")
## Warning: Removed 168369 rows containing missing values (geom_path).

7 - Conclusion

 

A work by Yan Holtz

Yan.holtz.data@gmail.com